#DESeq for 3'-targeted RNA-seq data.  Genes correspond to proteins that were significantly loaded along MDS2 in NMDS of proteomics data for 400 vs. 2800 µatm (the axis that separated the oysters according to treatment).  Total reads are summed within CGIDs that were linked to isotigs #s through blastx.
setwd("/Users/emmatimminsschiffman/Documents/Dissertation/3' targeted illumina sequencing C. gigas/isotig RNA-Seq/compare with proteomics")

seq.dat<-read.csv('Highly sig CGIDs with total reads pCO2.csv', header=T, row.names=1)

seqdesign=data.frame(row.names=colnames(seq.dat), condition=c("highco2", 'highco2', 'highco2', 'highco2', 'lowco2', 'lowco2', 'lowco2','lowco2'), libType=c(rep('single-end',8)))
condition = factor(c("highco2", 'highco2', 'highco2', 'highco2', 'lowco2', 'lowco2', 'lowco2','lowco2'))

cds=newCountDataSet(seq.dat, condition)

cds=estimateSizeFactors(cds)
sizeFactors(cds)

#cds=estimateDispersions(cds)
#plotDispEsts(cds)
#error in parametricDispersionFit(means,disps)

#res=nbinomTest(cds, 'highco2', 'lowco2')